{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7ae81548",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1fa02afa1ae04b9a89d8a74f281c0101",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bfae68a5e22345dfa0dc95745e12755c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "\n",
    "from dataclasses import dataclass, field\n",
    "from typing import Optional\n",
    "import contextlib\n",
    "\n",
    "import torch\n",
    "from datasets import load_dataset\n",
    "from transformers import (\n",
    "    AutoModelForCausalLM,\n",
    "    AutoTokenizer,\n",
    "    BitsAndBytesConfig,\n",
    "    HfArgumentParser,\n",
    "    AutoTokenizer,\n",
    "    TrainingArguments,\n",
    ")\n",
    "\n",
    "model = \"smangrul/starcoder-personal-copilot\"\n",
    "tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model,\n",
    "    quantization_config=None,\n",
    "    device_map=None,\n",
    "    trust_remote_code=True,\n",
    "    torch_dtype=torch.bfloat16,\n",
    "    revision=\"ded46582a1cdecdddae0c3bc5051a7b967e54029\"\n",
    ")\n",
    "\n",
    "if not hasattr(model, \"hf_device_map\"):\n",
    "    model.cuda()\n",
    "\n",
    "\n",
    "def get_code_completion(prefix, suffix):\n",
    "    text = prompt = f\"\"\"<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>\"\"\"\n",
    "    model.eval()\n",
    "    outputs = model.generate(\n",
    "        input_ids=tokenizer(text, return_tensors=\"pt\").input_ids.cuda(),\n",
    "        max_new_tokens=128,\n",
    "        temperature=0.2,\n",
    "        top_k=50,\n",
    "        top_p=0.95,\n",
    "        do_sample=True,\n",
    "        repetition_penalty=1.0,\n",
    "    )\n",
    "    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "537d2b43",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/sourab/transformers/src/transformers/generation/utils.py:1462: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
      "  warnings.warn(\n",
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>from accelerate import Accelerator\n",
      "\n",
      "accelerator = Accelerator()\n",
      "\n",
      "model, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>accelerator.prepare(\n",
      "    model, optimizer, training_dataloader, scheduler\n",
      ")\n",
      "```\n",
      "\n",
      "## Launching your distributed script\n",
      "\n",
      "Once your script is ready, you can launch it in a distributed environment with the following command:\n",
      "\n",
      "```bash\n",
      "accelerate launch {script_name.py} --args_to_the_script\n",
      "```\n",
      "\n",
      "For instance, the following command would launch a training script named `my_script.py` with the argument `num_epochs`:\n",
      "\n",
      "```bash\n",
      "accelerate launch my_script.py --num_epochs=3\n",
      "```\n",
      "\n",
      "You can also\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"from accelerate import Accelerator\n",
    "\n",
    "accelerator = Accelerator()\n",
    "\n",
    "model, optimizer, training_dataloader, scheduler = \"\"\"\n",
    "\n",
    "suffix = \"\"\"\"\"\"\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "28c9db11",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model\n",
      "from transformers import AutoModelForCausalLM\n",
      "\n",
      "peft_config = LoraConfig(<fim_suffix>)<fim_middle>\n",
      "    task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1\n",
      ")\n",
      "\n",
      "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n",
      "model = get_peft_model(model, peft_config<|endoftext|>\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"\\\n",
    "from peft import LoraConfig, TaskType, get_peft_model\n",
    "from transformers import AutoModelForCausalLM\n",
    "\n",
    "peft_config = LoraConfig(\"\"\"\n",
    "\n",
    "suffix = \")\"\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4fd0ae4f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>\n",
      "# Here is the correct implementation of the two sum code exercise\n",
      "# time complexity: O(N)\n",
      "# space complexity: O(N)\n",
      "def two_sum(arr, target_sum):\n",
      "<fim_suffix><fim_middle>    num_idxs = len(arr)\n",
      "    seen_nums = {}\n",
      "\n",
      "    for i in range(num_idxs):\n",
      "        current_num = arr[i]\n",
      "        needed_num = target_sum - current_num\n",
      "\n",
      "        if needed_num in seen_nums:\n",
      "            return [seen_nums[needed_num], i]\n",
      "        else:\n",
      "            seen_nums[current_num] = i\n",
      "\n",
      "\n",
      "# Here is the correct implementation of the three sum code exercise\n",
      "# time complexity: O(N^2)\n",
      "# space complexity: O(N)\n",
      "def three_sum(arr, target_sum\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"\n",
    "# Here is the correct implementation of the two sum code exercise\n",
    "# time complexity: O(N)\n",
    "# space complexity: O(N)\n",
    "def two_sum(arr, target_sum):\n",
    "\"\"\"\n",
    "\n",
    "suffix = \"\"\"\"\"\"\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4c858212",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<fim_prefix>import math\n",
      "import re\n",
      "import warnings\n",
      "from dataclasses import asdict, dataclass, field, replace\n",
      "from enum import Enum\n",
      "from typing import List, Optional, Tuple, Union\n",
      "\n",
      "import torch\n",
      "import torch.nn as nn\n",
      "import torch.nn.functional as F\n",
      "from tqdm import tqdm\n",
      "from transformers.pytorch_utils import Conv1D\n",
      "\n",
      "from..config import PeftConfig\n",
      "from..import_utils import is_bnb_4bit_available, is_bnb_available\n",
      "from..utils import (\n",
      "    CLAMP_QUANTILE,\n",
      "    COMMON_LAYERS_PATTERN,\n",
      "    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,\n",
      "    ModulesToSaveWrapper,\n",
      "    PeftType,\n",
      "    _freeze_adapter,\n",
      "    _get_submodules,\n",
      "    transpose,\n",
      ")\n",
      "from.tuners_utils import BaseTuner, BaseTunerLayer\n",
      "\n",
      "@dataclass\n",
      "class BottleneckAdapterConfig(PeftConfig):\n",
      "    \"\"\"\n",
      "    <fim_suffix>\n",
      "    \"\"\" <fim_middle>This is the configuration class to store the configuration of a [`BottleneckAdapter`].\n",
      "\n",
      "    Args:\n",
      "        in_features (`int`): The number of input features.\n",
      "        out_features (`int`): The number of output features.\n",
      "        expand_ratio (`int`): The expansion ratio of the bottleneck adapter.\n",
      "        hidden_act (`str`, *optional*, defaults to `\"silu\"`): The non-linear activation function (function or string) in the encoder and pooler. If string, `\"gelu\"`, `\"relu\"`, `\"selu\"` and `\"gelu_new\"` are supported.\n",
      "        conv_kernel_size (`int`,\n"
     ]
    }
   ],
   "source": [
    "prefix = \"\"\"import math\n",
    "import re\n",
    "import warnings\n",
    "from dataclasses import asdict, dataclass, field, replace\n",
    "from enum import Enum\n",
    "from typing import List, Optional, Tuple, Union\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from tqdm import tqdm\n",
    "from transformers.pytorch_utils import Conv1D\n",
    "\n",
    "from ..config import PeftConfig\n",
    "from ..import_utils import is_bnb_4bit_available, is_bnb_available\n",
    "from ..utils import (\n",
    "    CLAMP_QUANTILE,\n",
    "    COMMON_LAYERS_PATTERN,\n",
    "    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,\n",
    "    ModulesToSaveWrapper,\n",
    "    PeftType,\n",
    "    _freeze_adapter,\n",
    "    _get_submodules,\n",
    "    transpose,\n",
    ")\n",
    "from .tuners_utils import BaseTuner, BaseTunerLayer\n",
    "\n",
    "@dataclass\n",
    "class BottleneckAdapterConfig(PeftConfig):\n",
    "    \\\"\"\"\n",
    "    \"\"\"\n",
    "\n",
    "suffix = \"\"\"\n",
    "    \\\"\"\" \\\n",
    "\"\"\"\n",
    "\n",
    "print(get_code_completion(prefix, suffix))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8eec4eae",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
